library(vroom)
## Warning: package 'vroom' was built under R version 4.0.4
library(tidyverse)
## Registered S3 methods overwritten by 'readr':
## method from
## format.col_spec vroom
## print.col_spec vroom
## print.collector vroom
## print.date_names vroom
## print.locale vroom
## str.col_spec vroom
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ purrr 0.3.4
## √ tibble 3.0.4 √ dplyr 1.0.2
## √ tidyr 1.1.2 √ stringr 1.4.0
## √ readr 1.4.0 √ forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x readr::col_character() masks vroom::col_character()
## x readr::col_date() masks vroom::col_date()
## x readr::col_datetime() masks vroom::col_datetime()
## x readr::col_double() masks vroom::col_double()
## x readr::col_factor() masks vroom::col_factor()
## x readr::col_guess() masks vroom::col_guess()
## x readr::col_integer() masks vroom::col_integer()
## x readr::col_logical() masks vroom::col_logical()
## x readr::col_number() masks vroom::col_number()
## x readr::col_skip() masks vroom::col_skip()
## x readr::col_time() masks vroom::col_time()
## x readr::cols() masks vroom::cols()
## x readr::default_locale() masks vroom::default_locale()
## x dplyr::filter() masks stats::filter()
## x readr::fwf_empty() masks vroom::fwf_empty()
## x readr::fwf_positions() masks vroom::fwf_positions()
## x dplyr::lag() masks stats::lag()
## x readr::locale() masks vroom::locale()
## x readr::output_column() masks vroom::output_column()
## x readr::problems() masks vroom::problems()
library(janitor)
## Warning: package 'janitor' was built under R version 4.0.4
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## The following object is masked from 'package:vroom':
##
## col_factor
Drive <- vroom("eng317.csv")
## New names:
## * `` -> ...5
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 27,100
## Columns: 39
## Delimiter: ","
## chr [ 7]: group, license plate number, date, log_gasoline_1, log_co2_1, log_acc_1, log_dea...
## dbl [31]: ranking, alert, age, gender, accident, driving experience, gasoline consumption,...
## lgl [ 1]: ...5
##
## Use `spec()` to retrieve the guessed column specification
## Pass a specification to the `col_types` argument to quiet this message
factors <- c("group", "ranking", "alert", "gender","accident", "group", "app_usage")
numerics <- c("log_co2_1", "log_acc_1", "log_gasoline_1", "log_deacc_1")
Drive_clean <- Drive %>%
janitor::clean_names() %>%# get rid of some `` and underscore problems
rename(license = license_plate_number) %>%
mutate(license = na_if(license, "1.22E+16"),
log_co2_1 = na_if(log_co2_1, "#NULL!"),
log_acc_1 = na_if(log_acc_1, "#NULL!"),
log_gasoline_1 = na_if(log_gasoline_1, "#NULL!"),
log_deacc_1 = na_if(log_deacc_1, "#NULL!")) %>%
filter(!is.na(license)) %>% # without license plate no car identifier
transform(driver_id = as.numeric(factor(license))) %>% # add id column
mutate(group = str_extract(group, '(?<=^.{1}).')) %>%
mutate(date = lubridate::mdy(date)) %>% # convert to Date type
mutate(across(all_of(factors), as.factor)) %>%
mutate(across(all_of(numerics), as.numeric)) %>%
relocate(where(is.numeric), .after = where(is.character)) %>%
relocate(starts_with("log"), .after = last_col()) %>%
relocate(driver_id, .before = 1) %>%
relocate(driving_score, .before = 2) %>% # research focus near the front
relocate(date, .after = 2) %>%
relocate(app_usage, .before = 2) # dependent variable at the front
## Warning in FUN(X[[i]], ...): strings not representable in native encoding will
## be translated to UTF-8
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00DF>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C5>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E5>' to native encoding
## Warning: Problem with `mutate()` input `date`.
## i 20038 failed to parse.
## i Input `date` is `lubridate::mdy(date)`.
## Warning: 20038 failed to parse.
Drive_Anhui <- Drive_clean %>%
filter(str_detect(license, "皖"))
plotly::ggplotly( # For some reasons doesn't knit, but executes fine
Drive_Anhui %>%
filter(mileage > 0) %>%
select(license, rapid_acceleration, rapid_deacceleration, date) %>%
mutate(month = month(date)) %>%
group_by(license) %>%
filter(n() > 50 & rapid_acceleration < 500) %>%
# at least 30 data points over the year
ggplot(aes(x=rapid_acceleration, y=rapid_deacceleration)) +
geom_point(mapping = aes(color = month)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
facet_wrap(~license) +
ggtitle("Threshold-measurement based profiling: Acceleration vs Deacceleration") +
xlab("Rapid acceleration penalty count") +
ylab("Rapid deacceleration penalty count")
)
#library("knitr")
#install.packages("devtools")
#install_github("plotly", "ropensci")
#library("devtools")
#library(plotly)
#p
Sys.getlocale()
## [1] "LC_COLLATE=Chinese (Simplified)_China.936;LC_CTYPE=Chinese (Simplified)_China.936;LC_MONETARY=Chinese (Simplified)_China.936;LC_NUMERIC=C;LC_TIME=Chinese (Simplified)_China.936"